In [50]:
%matplotlib inline
In [51]:
import numpy as np
import pandas as pd
import seaborn as sns
import itertools
import matplotlib.pyplot as plt

from sgmtradingcore.analytics.metrics import flat_capital_metrics
from stratagemdataprocessing.dbutils.mongo import MongoPersister
In [52]:
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (16, 8)
In [53]:
db_name = 'backtesting'
collection_name = 'strategy_results'
In [54]:
mongo_db = MongoPersister.init_from_config(db_name, auto_connect=True)
coll = mongo_db[collection_name]
In [55]:
def to_dataframe(orders):
    settled = filter(lambda o: o['status_str'] == 'SETTLED', orders)

    cols = ['placed_time', 'pnl', 'size', 'bet_side', 'price', 'date_day', 'event_id', 'sticker', 'details']
    rcols = {'placed_time': 'dt', 'size': 'stake', 'price': 'odds', 'date_day': 'date'}

    df = pd.DataFrame(settled, columns=cols).rename(columns=rcols)

    df['is_back'] = (df['bet_side'] == 'back')
    df['capital'] = 10000
    df['reason'] = df['details'].apply(lambda d: d.get('reason'))
    df['pair_key'] = df['details'].apply(lambda d: tuple(d['pair_key']))
    df['portfolio_id'] = df['details'].apply(lambda d: d['portfolio_id'])
    df['plausible_returns'] = df['details'].apply(lambda d: d.get('plausible_returns', []))
    df['weight'] = df['details'].apply(lambda d: None if d['trade_intention']['name'] != 'OpenTradeSingleSticker' else d['signals'][0]['value'][d['trade_intention']['sticker']])

    del df['details']

    return df
In [56]:
name = 'coint'
desc = 'cochrane_orcutt'
code = 'ft12_ftps.nba'
mnemonic = 'ts.mmp_entryexit'
trading_id = '562f5bef497aee1c22000001'
config_id = '5a00cd4d9316de5683386d49'
In [57]:
rows = list(coll.find({
    'strategy_name': name,
    'strategy_desc': desc,
    'strategy_code': code,
#     'trading_user_id': trading_id,
    'mnemonic': mnemonic,
    'config_id': config_id
}))
In [59]:
df = pd.concat([
    to_dataframe(list(mongo_db['orders'].find({
        'strategy_result_id': str(r['_id'])
    }))) for r in rows if r['n_orders'] > 0
])
In [60]:
df['month'] = df.dt.dt.month

Basics:

In [61]:
metrics = flat_capital_metrics(df, groupby='month').T
metrics.to_csv('/tmp/metrics.csv')
metrics
Out[61]:
month 1 2 3 4 5 10 11 12
n_trades 15776.000000 10779.000000 16462.000000 11084.000000 1407.000000 2683.000000 18561.000000 16418.000000
n_win 8043.000000 5436.000000 8282.000000 5693.000000 622.000000 1321.000000 9057.000000 8287.000000
n_loss 7733.000000 5343.000000 8180.000000 5391.000000 785.000000 1362.000000 9504.000000 8131.000000
hit_ratio 0.509825 0.504314 0.503098 0.513623 0.442075 0.492359 0.487959 0.504751
average_trade_win 0.003481 0.003579 0.003680 0.003839 0.003858 0.003974 0.003725 0.003633
average_trade_loss 0.004810 0.004852 0.004951 0.005363 0.003870 0.004964 0.004937 0.005017
unitary_stake_return -0.109682 -0.111470 -0.123424 -0.062758 -0.236271 -0.120043 -0.158112 -0.096300
cr_trade 0.752759 0.750401 0.752442 0.755859 0.789924 0.776322 0.718901 0.737908
cr_day NaN NaN NaN 0.000743 0.004741 0.006647 NaN NaN
cum_return -9.196150 -6.470451 -10.026570 -7.059136 -0.638257 -1.512369 -13.190035 -10.692460
volatility (not annualised) 1.019385 1.077948 1.120232 1.691485 0.200952 0.367304 2.398609 1.303281
sharpe_ratio -9.021269 -6.002563 -8.950442 -4.173336 -3.176160 -4.117482 -5.499034 -8.204260
maximum_drawdown -9.027111 -6.294561 -9.558771 -6.297542 -0.633278 -1.522490 -13.107272 -10.218525
drawdown_duration (days) 30.000000 27.000000 30.000000 29.000000 21.000000 6.000000 29.000000 30.000000
maximum_runup 0.000000 0.000000 0.000000 -0.000736 0.003308 0.000000 0.000000 0.000000
runup_duration (days) 0.000000 0.000000 0.000000 1.000000 1.000000 0.000000 0.000000 0.000000
total_pnl -91961.501210 -64704.512459 -100265.700055 -70591.359123 -6382.568276 -15123.689594 -131900.345075 -106924.596459
n_trading_days 31.000000 22.000000 31.000000 29.000000 20.000000 7.000000 30.000000 31.000000
In [62]:
flat_capital_metrics(df, groupby='date')['total_pnl'].hist()
plt.title('Distribution of total daily PnL.')
plt.xlabel('PnL')
plt.ylabel('Frequency')
Out[62]:
<matplotlib.text.Text at 0x7fd04a188050>
In [63]:
flat_capital_metrics(df, groupby='event_id')['total_pnl'].hist()
plt.title('Distribution of event PnL.')
plt.xlabel('PnL')
plt.ylabel('Frequency')
Out[63]:
<matplotlib.text.Text at 0x7fd0df01f750>

Portfolio-based analysis:

In [64]:
pdf = pd.DataFrame(columns=['portfolio_id', 'pnl', 'age', 'reason']).set_index('portfolio_id')
gps = df.groupby('portfolio_id')

for g in gps.groups:
    gp = gps.get_group(g).sort_values('dt')
    pdf.loc[g] = [gp.pnl.sum(), (gp.iloc[-1]['dt'] - gp.iloc[0]['dt']).total_seconds(), gp.iloc[-1].reason]

pdf['age'] = pdf['age'].astype(np.int)
In [65]:
pdf.describe().T
Out[65]:
count mean std min 25% 50% 75% max
pnl 18545.0 -31.698801 85.302734 -3920.429093 -44.632785 -17.838743 -5.846389 186.063667
age 18545.0 382.412133 337.243879 0.000000 104.000000 387.000000 526.000000 4603.000000
In [66]:
print 'Minimum PnL:'
pdf[pdf.pnl == pdf.pnl.min()][['pnl', 'age']]
Minimum PnL:
Out[66]:
pnl age
portfolio_id
5a01101f9316de5916190182 -3920.429093 23
In [67]:
print 'Maxmimum PnL:'
pdf[pdf.pnl == pdf.pnl.max()][['pnl', 'age']]
Maxmimum PnL:
Out[67]:
pnl age
portfolio_id
5a0104f79316de5916140e5f 186.063667 402
In [68]:
axes = pdf[['pnl', 'age']].hist()[0]
plt.suptitle('Distribution of portfolio age and PnL')

axes[0].set_xlabel('Age [s]')
axes[0].set_ylabel('Frequency')

axes[1].set_xlabel('PnL')
axes[1].set_ylabel('Frequency')
Out[68]:
<matplotlib.text.Text at 0x7fd1e76a0a90>
In [69]:
pdf[pdf.age < pdf.age.quantile(0.25)]['pnl'].hist(alpha=0.75)
pdf[pdf.age > pdf.age.quantile(0.75)]['pnl'].hist(alpha=0.75)
plt.legend(['< q25', '> q75'])

plt.title('Distribution of portfolio PnLs for the upper and lower quantiles on age.')
plt.xlabel('PnL')
plt.ylabel('Frequency')
Out[69]:
<matplotlib.text.Text at 0x7fd1e4318450>
In [70]:
df.groupby('reason')['pnl'].describe().T
Out[70]:
reason                   
portfolio_conflict  count     9395.000000
                    mean        -4.527020
                    std         52.077132
                    min      -2057.865698
                    25%        -33.529660
                    50%          0.097190
                    75%         22.215077
                    max        221.875031
sl/tp               count    29624.000000
                    mean       -14.193205
                    std         91.311260
                    min      -5257.888887
                    25%        -32.899194
                    50%         -0.596818
                    75%          6.656788
                    max       2099.901080
ttl_expired         count    17061.000000
                    mean        -5.093734
                    std         47.933015
                    min      -1161.087590
                    25%        -36.798719
                    50%         -0.270000
                    75%         25.402174
                    max        240.741290
Name: pnl, dtype: float64
In [71]:
f, ax = plt.subplots(1)
gps = pdf[pdf.reason.values != None].groupby('reason')

for gid, gp in gps:
    gp.pnl.hist(alpha=0.6, label=gid)

_ = plt.legend()
In [72]:
sns.lmplot("age", "pnl", data=pdf, hue="reason", fit_reg=False, aspect=1.8)
plt.title('Distribution of portfolio PnLs as a function of age.')
plt.xlabel('Age [s]')
plt.ylabel('PnL')
Out[72]:
<matplotlib.text.Text at 0x7fd0fb5aa4d0>

Time series:

In [73]:
def load_df(s):
    from sgmarb.backtesting.data import clean_dataframe
    from stratagemdataprocessing.bookmakers.common.odds.cache import HistoricalOddsCache

    hoc = HistoricalOddsCache(parse=False)
    sdf = clean_dataframe(hoc.get('%s.BF' % s), min_matched=0)
    if sdf is not None:
        sdf['timestamp'] = pd.to_datetime(sdf.timestamp, unit='ms')
        return sdf.set_index('timestamp')
    else:
        return None
In [74]:
def do_plot(pk):
    edf = df[df.pair_key == pk].sort_values('dt')
    stickers = edf['sticker'].unique()

    sdfs = {s: load_df(s) for s in stickers}
    sdfs = {s: df for (s, df) in sdfs.iteritems() if df is not None}

    f, axes = plt.subplots(len(sdfs), sharex=True, figsize=(16, 4*len(sdfs)))
    plt.suptitle(pk)

    for i, (s, sdf) in enumerate(sdfs.iteritems()):
        mp = (sdf['bp1'] + sdf['lp1']) / 2.0
        axes[i].plot(sdf.index, mp, drawstyle='steps-post', color='k', alpha=0.5)
        axes[i].set_title(s)
        axes[i].set_ylim([1.0, min(10.0, mp.max()*1.1)])

        for pid, p in edf[edf.sticker == s].groupby('portfolio_id'):
            open_dt = p['dt'].min()
            close_dt = p['dt'].max()

            if p[p.dt == open_dt].iloc[0].is_back:
                axes[i].axvspan(open_dt, close_dt, color='red', alpha=0.1, label='Back')
            else:
                axes[i].axvspan(open_dt, close_dt, color='blue', alpha=0.1, label='Lay')
In [75]:
for pk in df.pair_key.unique()[:50]:
    do_plot(pk)